library(sp)


# geo<-read.csv("XXXXX/Geacron Locations.csv",stringsAsFactors = F)
# geo<-geo[geo$Battle!="",]
# master<-read.csv("XXXXXX/HCED Master Data.csv",stringsAsFactors = F)
# 
# my_locs<-apply(as.matrix(master$ID),1,function(x) gsub('[[:digit:]]+', '', x))
# 
# master<-data.frame(my_locs,master)
# 
# master<-master[master$my_locs!="",]
# 
# names(master)[1]<-"Location"
# 
# clean_locs<-apply(as.matrix(master$Location),1,function(x) gsub("\\s*\\([^\\)]+\\)","",x))
# 
# clean_locs<-apply(as.matrix(clean_locs),1,function(x) gsub("-$","",x))
# 
# master$Location<-clean_locs
# 
# crosswalk<-apply(as.matrix(geo$Battle),1,function(x) match(x,master$Location))
# 
# my_name<-matrix(NA,dim(geo)[1],1)
# my_year<-matrix(NA,dim(geo)[1],1)
# my_lat<-matrix(NA,dim(geo)[1],1)
# my_long<-matrix(NA,dim(geo)[1],1)
# 
# for (i in 1:length(crosswalk)){
#   if (!is.na(crosswalk[i])){
#   my_name[i]<-master$Location[crosswalk[i]]
#   my_year[i]<-master$Year[crosswalk[i]]
#   my_lat[i]<-master$Latitude[crosswalk[i]]
#   my_long[i]<-master$Longitude[crosswalk[i]]
#   }
# }
# 
# out<-data.frame(geo,my_name,my_year,my_lat,my_long)
# 
# ####Export for manual cleaning
# 
# write.csv(out,file="Geocron Merger.csv")


####Post cleaning

library(rgeos,sp,sf)

data<-read.csv("XXXXX/Geocron Merger.csv",stringsAsFactors=FALSE)

dists<-matrix(NA,dim(data)[1],1)

for (i in 1:dim(data)[1]){
  
  if (!is.na(data$my_lat[i])){

test<-SpatialPoints((cbind(data$my_long[i],data$my_lat[i])),proj4string=CRS("+proj=longlat +datum=WGS84 +no_defs"))
test2<-SpatialPoints((cbind(data$Longitude[i],data$Latitude[i])),proj4string=CRS("+proj=longlat +datum=WGS84 +no_defs"))

dists[i]<-spDistsN1(test,test2,longlat=T)

  }}

check<-subset(data,dists>=100)

write.csv(check,file="Locations to Check.csv")


##Differences

###Me - Palo Alto, Texas; Geochron - Palo Alto, CA
###Me - Monmouth, PA; Geochron - Monmouth, England
###Me - Samhud, Egypt; Geochron - Samhud, Romania
###Me - Chengpu - see source listed in data file. Just east of the bend in the Yellow River
###Me - Bergen, Hesse; Geochron - Bergen, Norway
###Me - San Juan Puerto Rico; Geochron - San Luis, Cuba
###Me - Elasa, Israel; Geochron - Elasa, Greece
###Me - Khalule, Syria; Geochron - Baghdad, Iraq
###Me - Holowcyn, Belarus (on the Babich); Geochron - Krakow, Poland
###Me - Ituzaingo, Argentina; Geochron - Mangueiras, Brazil
###Me - Noreia, Austria; Geochron - Miage Glacier, Italy
###Me - Biberach an der Riss, Germany; Geochron - Berlin
###Me - Kojima, Chiba Prefecture, Japan; Geochron - Okubo, Okayama, Japan
###Me - Gura'i, Eritrea (near Massawa on the Red Sea); Geochron - Pawatelam, Ethiopia
###Me - Vich, Barcelona, Spain; Geochron - Vichy, France
###Me - Vincy-Reuil-et-Magny, Hauts de France ; Geochron - Les Rues-des-Vignes, France. The latter is close to Cambrai as per Jaques but no place name 'Vincy' nearby
###Me - Minato, Wakayama, 640-8404, Japan. Showalter's description of the battle is as a sortie by the defenders of Wakayama Castle against their besiegers; Geochron - Kashii, Fukuoka, Japan
###Me - Errachidia Province, Morocco, next to Lac Isli; Geochron - Oujda, Morocco
###Me - Irtah, Turkey between Antioch and Aleppo as per Jaques; Geochron - Idlib District, Syria
###Me - Yuryev-Polski District, Russia as per Jaques; Geochron - Lipetsk, Lipetsk Oblast, Russia, 398001
###Me - 5-chome-2-1 Yamazaki, Shimamoto, Mishima District, Osaka 618-0001, Japan - just south of Kyoto as in Jaques ; Geochron - 2 Chome Banshomen, Misato, Saitama 341-0056, Japan
###Me - Massangano, Angola, as in Jaques; Geochron - Kingombe, Angola
###Me - Tekirdag, Turkey; Geochron - Petrich, Bulgaria not close to Edirne/Adrianopole
###Me - Off the coast of Sirte, Libya; Geochron - in the Mediteranean far north of  Sirte
###Me - Present day Oeud Medjerda, Tunisia (formerly Bagradas River, as per Jaques); Geochron - Ras El Oued, Algeria
###Me - Bordj Taguin, Algeria; Geochron - Tigherghar, Algeria
###Me - Nur Selam, Ethiopia between Harrar and Jijiga, Ethiopia, as per Jaques; Geochron - Shekosh, Ethiopia
###Me - Iron Gate of Transylvania as per https://www.britannica.com/place/Caras-Severin#ref1023149 ; Geochron - Prundu, Romania
###Me - Breskens, Netherlands at the mouth of the Scheldt as per Jaques; Geochron - Groningen in the Netherlands
###Me - Elhovo, Bulgaria, northwest of Edirne as per Jaques; Geochron - Sofia, Bulgaria
###Me - Slim Buttes National Forrest, SD USA; Geochron - North Hughes, South Dakota
###Me - River Raab in Hungary, as per Jaques; Geochron - Raab in Austria
###Me - Riveretto, Italy; Geochron - Friedberg, Germany
###Me - Larga, Moldavia; Geochron - Causeni District, Moldova
###Me - Sogdian Rock, Uzbekistan; Geochron - Forish District, Uzbekistan
###Me - Foot of Mount Parthenion, as per Jaques; Geochron - Platykampos 400 09, Greece
###Me - Oberseben, Austria; Geochron - Podborany, Czechia
###Me - Nagyk�ta, 2760 Hungary; Geochron - 2404 Petronell-Carnuntum, Austria
###Me - 85029 Venosa, Province of Potenza, Italy; Geochron - Via Santa Bernadette, 19, 88046 Lamezia Terme CZ, Italy
###Me - Vincennes, Indiana; Geochron - Kaskaskia, IL 62233, USA
###Me - Cape St George, New Ireland; Geochron - North of Bougainville
###Me - Alidas Vej 14, 8700 Horsens, Denmark as per Jaques near Horsens; Geochron - Lund, Sweden
###Me - Ennab, Syria near the Orontes, as per Jaques; Geochron - Palmyra, Syria
###Me - Cao County, Heze, Shandong, China between the Yellow River and Weishan Lake, as per Pankenier; Geochron - Wen County, Henan, Jiaozuo, Henan, China
###Me - 4RMV+6X3 Ostrovno, Belarus, north of Mogilev as per Jaques; Geochron - Minsk
###Me - Borisov, Belarus on the Berezina, as per Jaques; Geochron - Gomel Region, Belarus
###Me - Kuzukue Castle remains, Yokohama, as per Jaques; Geochron - 1127-1 Higashikozakacho, Nagahama, Shiga 526-0802, Japan
###Me - Argos as per Jaques; Geochron - Alizia 300 19, Greece
###Me - Mount Hiuchi, Japan; Geochron - Higashikobaracho, Fukui, 910-2472, Japan
###Me - Cocherel 27120 Houlbec-Cocherel France, west of Paris near Mantes as per Jaques; Geochron - 77440 Cocherel, France
###Me - Cilincing, Java, where the British landed; Geochron - Area Sawah, Karangendep, Patikraja, Banyumas Regency, Central Java, Indonesia
###Me - Ishibashiyama near Odawara as in Jaques; Geochron - Higashino, Nagaizumi, Sunto District, Shizuoka 411-0931, Japan
###Me - Laswari Battlefield near Alwar as in Jaques; Geochron - Mirpur, Uttar Pradesh 283125, India
###Me - Jingxing; Geochron - Wuqiang County, Hengshui, Hebei, China
###Me - 400 miles west of Ushant as in Jaques; Geochron - 400 miles north west of Ushant
###Me - Bennachie, Aberdeenshire as per Jaques; Geochron - Edinburgh
###Me - Vidin, Bulgaria as per Jaques; Gulyantzi, Bulgaria
###Me - Boomplaats mountain peak near Hopetown as per Jaques; Geochron - Bergplaats, South Africa
###Me - 7-43 R753, Aughrim Lower, Aughrim, Co. Wicklow, Ireland; Geochron - Attibrassil, Cornfield, Co. Galway, Ireland
###Me - Yanling Village Yichuan County, Henan, Luoyang, Henan, China; Geochron - Jian'An, Xuchang, Henan, China
###Me - Almansurah, Al Yamamah, Al-Kharj 16285, Saudi Arabia; Geochron - Dhurma Saudi Arabia
###Me - Huan Cun Lu, Mu Dan Qu, He Ze Shi, Shan Dong Sheng, China, 274041 as per Jaques; Geochron - Xinxiang, Henan, China
###Me - Caiza "D", Bolivia ; Geochron - Colquechaca, Bolivia
###Me - 1 Rue de l'Hotel de ville, 70400 H�ricourt, France near Belfort as in Jaques; Geochron - 21600 Longvic, France
###Me - Monastir, Tunisia as per Jaques; Geochron - Tunis, Tunisia
###Me - Jiangling County, Jingzhou, Hubei, China on the Yangtze as per Jaques; Geochron - Chibi, Xianning, Hubei, China
###Me - Ardabil Province, Aslan Duz, 12, Iran; Geochron - Hajiqabul, Azerbaijan
###Me - Zborov north of Ternopol as per Jaques; Geochron - 1, Zalishchyky, , Ukraine, 48600
###Me - Zafer, 03032 Anitkaya/Afyonkarahisar Merkez/Afyonkarahisar, Turkey near modern Afyon as per Jaques; Geochron - K���kg�k�eli/Isparta Merkez/Isparta, Turkey
###Me - 56140 Vuosalmi, Finland; Geochron - Priozersky District, Leningrad Oblast, Russia
###Me - Yuchang, Hubei, China as per Jaques; Geochron - Shayang County, Jingmen, Hubei, China
###Me - 804 y miraflores, Jun�n, Peru around 100 miles NE of Lima as per Jaques; Geochron - Comas, 12220, Peru
###Me - Schwetzingen, Baden Wurtemburg, Germany, as per Jaques; Geochron - 72172 Sulz, Germany
###Me - Shikoku Island, Japan; Geochron - Wada, Tosa, Tosa District, Kochi 781-3409, Japan also on Shikoku Island
###Me - Sauchieburn; Geochron - Stirling
###Me - Marchfeld, Austria; Geochron - Jedenspeigen, 2264, Austria
###Me - Puerto Boyac�, Boyaca, Colombia near Tunja as in Jaques; Geochron - Samac�, Boyaca, Colombia
###Me - Azar collage, Lucknow, Uttar Pradesh 226001, India; Geochron - Katkusama, Uttar Pradesh, India
###Me - Hajnice, Czechia; Geochron - Prague, Czechia
###Me - Bitola, Macedonia as per Jaques; Geochron - Skopje, Macedonia
###Me - Nesjarveien, Larvik; Geochron - Ullern, 0377 Oslo, Norway
###Me - Jalalapur Sharif, Pakistan, near the Jhelum River as per Jaques ; Geochron - Thatha Kheru, Hafizabad, Punjab, Pakistan
###Me - River Sit, Russia ; Geochron - Bolsheselsky District, Yaroslavl Oblast, Russia
###Me - Changping District, Beijing, China; Geochron - Changpingyuan, Gao Ping Shi, Jin Cheng Shi, China, 048400
###Me - V3R2+2PH Thiriyai, Sri Lanka; Geochron - 4HWC+2CM Vakarai, Sri Lanka



# dincecco<-read.csv("XXXXX/Dincecco Coordinates.csv")
# 
# crosswalk<-apply(as.matrix(dincecco$names),1,function(x) match(x,data$Battle))
# 
# cm_name<-matrix(NA,dim(dincecco)[1],1)
# cm_year<-matrix(NA,dim(dincecco)[1],1)
# cm_lat<-matrix(NA,dim(dincecco)[1],1)
# cm_long<-matrix(NA,dim(dincecco)[1],1)
# 
# for (i in 1:length(crosswalk)){
#   if (!is.na(crosswalk[i])){
#     cm_name[i]<-data$my_name[crosswalk[i]]
#     cm_year[i]<-data$my_year[crosswalk[i]]
#     cm_lat[i]<-data$my_lat[crosswalk[i]]
#     cm_long[i]<-data$my_long[crosswalk[i]]
#   }
# }
# 
# ###Only 330 matched. Export for cleaning###
# 
# out<-data.frame(dincecco,cm_name,cm_year,cm_lat,cm_long)
# 
# write.csv(out,file="Dincecco Merger.csv")

###Post Cleaning###

data<-read.csv("Dincecco Merger.csv")

dists<-matrix(NA,dim(data)[1],1)

for (i in 1:dim(data)[1]){
  
  if (!is.na(data$cm_lat[i])){
    
    test<-SpatialPoints((cbind(data$cm_long[i],data$cm_lat[i])),proj4string=CRS("+proj=longlat +datum=WGS84 +no_defs"))
    test2<-SpatialPoints((cbind(data$longitude[i],data$latitude[i])),proj4string=CRS("+proj=longlat +datum=WGS84 +no_defs"))
    
    dists[i]<-spDistsN1(test,test2,longlat=T)
    
  }}

inspection<-data.frame(data,dists)

write.csv(inspection[order(-dists)[1:31],],file="Dincecco To Check.csv")
